Examen visión computacional¶


Instrucciones¶


  1. Revisa el dataset que se incluye en las celdas siguientes.
  2. En base a este, crea un clasificador que pueda decir el tipo de coche y su color.
  3. Llena las celdas de código como se va solicitando.
  4. Al terminar, sube el notebook con todas las celdas ejecutadas a canvas.

Imports¶

In [45]:
import numpy as np
import cv2
import os
import math

import torch
import torch.nn.functional as F
from torch import optim, nn, utils, Tensor
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
import pandas as pd

import torchmetrics
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder

import matplotlib.pyplot as plt
In [ ]:
# Local settings


# importante cambiar path en caso de querer probar localmente
ABS_PATH_DATASET = "~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars/"

ABS_PATH_DATASET = os.path.expanduser(ABS_PATH_DATASET)

ABS_PATH_MANUAL_TESTING_DATASET = "~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/"
ABS_PATH_MANUAL_TESTING_DATASET = os.path.expanduser(ABS_PATH_MANUAL_TESTING_DATASET)

cuda_available = torch.cuda.is_available()
mps_available = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()

if cuda_available:
    device = torch.device("cuda")
    accelerator = "gpu"
    devices = 1
elif mps_available:
    device = torch.device("mps")
    accelerator = "mps"
    devices = 1
else:
    device = torch.device("cpu")
    accelerator = "cpu"
    devices = None

pin_memory = True if cuda_available else False

print(f"Using device={device} (cuda={cuda_available}, mps={mps_available}), accelerator={accelerator}, devices={devices}")
Using device=mps (cuda=False, mps=True), accelerator=mps, devices=1

Descarga del dataset¶

In [4]:
!curl -sSL https://www.kaggle.com/api/v1/datasets/download/julichitai/multilabel-small-car-and-color-dataset -o ./cars_multilabel.zip
!unzip -q cars_multilabel.zip -d ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars
!rm cars_multilabel.zip
In [5]:
!ls $ABS_PATH_DATASET
matiz black matiz red   rio blue    tiggo black tiggo red
matiz blue  rio black   rio red     tiggo blue
In [6]:
!ls $ABS_PATH_DATASET/matiz\ black
000001.jpg
000002.jpg
000003.jpg
000004.jpg
000005.jpg
000006.jpg
000007.jpg
000008.jpg
000009.jpg
000010.jpg
000011.jpg
000012.jpg
000013.jpg
000014.jpg
000015.jpg
000016.jpg
000017.jpg
000018.jpg
000019.jpg
000020.jpg
000021.jpg
000022.jpg
000023.jpg
000024.jpg
000025.jpg
000026.jpg
000027.jpg
000028.jpg
000029.jpg
000030.jpg
000031.jpg
000032.jpg
000033.jpg
000034.jpg
000035.jpg
000036.jpg
000037.jpg
000038.jpg
000039.jpg
000040.jpg
000041.jpg
000042.jpg
000043.jpg
000044.jpg
000045.jpg
000046.jpg
000047.jpg
000048.jpg
000049.jpg
000050.jpg
000051.jpg
000052.jpg
000053.jpg
000054.jpg
000055.jpg
000056.jpg
000057.jpg
000058.jpg
000059.jpg
000060.jpg
000061.jpg
000062.jpg
000063.jpg
000064.jpg
000065.jpg
000066.jpg
000067.jpg
000068.jpg
000070.jpg
000071.jpg
000072.jpg
000073.jpg
000074.jpg
000076.jpg
000077.jpg
000081.jpg
000082.jpg
000083.jpg
000084.jpg
000085.jpg
000094.jpg
000099.jpg
000100.jpg
000101.jpg
000102.jpg
000105.jpg
000108.jpg
000112.jpg
000113.jpg
000125.jpg
000186.jpg
004685d0872defb3755b42a39f374a8e3e96c3d2.png
01cacfae3feffcb3d019f906497242a21c625858.png
030215640ea6a48c30ce6dfec047486daae6be8c.png
0566605b35c9c7e84792a70747c7b593bca97b21.png
05725e7668a1dc5f0b33648fdb6ee8bc.png
076871215f8de1752d0776bece3d547e0c3c2990.png
08065d7ba0fddb3b7b021a43bd55804b1711786a.png
08afc7aee91e68cbe86311340d3752e3.png
0f1f3625316794d1d2f60770e43eeb21f3f75786.png
10ecbd57346e5c638071352bc21c5e29440d03c9.png
13449f0bea2a9cedd905f8e140a2adfb652db7b2.png
169ad3e8bf4e1235df61a47971b47bdc.png
16f9eac46b66fd41e31368a981b6b945308d77c0.png
177a823220a56625542c3a550daec9e2c66c4aef.png
197aa1e3f4d2fc14852d441718df060e.png
1e0086885e7cccf8341658ebdf4b27ba9fd0c355.png
1e6a20a78e76c85a6ad545ede0f1766b9bd7459f.png
1f7747ddbef6302a318315c63ae5d803c794e595.png
203ae8e92d38a1476b4a5a93cd8fb2c416ca3b69.png
2165093b5686b62309c7290e35c86e094696d6fb.png
2215dacb7bec8ec0767e36119e6424be.png
25b4786b9cfe42e3141f1cea900ec962e42de56a.png
28603f16dc931fc35e46f18ba48a6e25c550dbf9.png
2a00000179ed6b0d8ea71aab283545974bcc.png
2a00000179efb6264a37f1edc06412d3ae96.png
2a0000017a006362a784769d35eb81a5a21c.png
2a0000017a0206d0bb48fb123b3fb7da45d1.png
2a0000017a0a9cf8af0cc089f4766d96f1ea.png
2a0000017a0cc9c0e49d437fa30725744c27.png
2a0000017a0e7b63e8158ba07e8b346bbe79.png
2a0000017a193885652f06b4edf4bd95e197.png
2a884d73e53b56fbbc2cea45eb281776.png
2ae07662150bd52cf9c702bd63cdd629.png
2c2bbe596a3cb0e37d32a5f4338ef5b2e61810a1.png
2d000d5899bfe225fccdd1cb32a46318_sr.png
2d31b1411357835256d01d14bd00bb2e13b1b660.png
2d78db60d1e46a087f6a4e174e349bfda5ab5524.png
31edbeeb90b87266abaf54c7b227f52c4fe59713.png
33e78b416574915f162866c795b0436ade360285.png
3498bd8106da059df1bd99e234b13a9d70d4fcd2.png
39d86724b0fb446ee8e61f9fe2882086dae735c1.png
3a92939b32afd6f469a3aecc79e5f1c03ca1d157.png
3b027cca1df1a7806c07881650cc16d5cdddc8bb.png
3f0cad75aa3a35107fa1b04fd60be061.png
3f9d6901551b9b455ef01a4f5ebbdf1e8207fecb.png
3fb9de18637440f89755160ebc4ad1768155c523.png
4036bf3af49db65bea3459e7aab5fd0abb0f7f4f.png
40c43857390029328336cef684d685bb793a0e38.png
42e79ef85f0342ec04e6f2d9acab50ec555edc71.png
4456de86d23932efa1c1296c0096890dd6b7de41.png
44f16a7561b9f3a6a274b96ee501fb1f.png
48dc3046b57d7a7d9f433249972c03080f07f7ab.png
4931d3fd903fcaa6902977af95fd154666c3afe1.png
4b80bb029408554dbd438662d823b9d6a3128e3b.png
4d6dafd1fab82a40661ca3f9c278c4c0.png
4dd80bf9b477eda24dc1e8ffaf87d74c.png
4f64d41fd138ad5f1f4cd59d7cd3aa53a0aeddf9.png
539cd86c6d85544db20300fbf6eb643d69c3738e.png
542c5b7c89bbc79eb62e31424a11bd0b.png
574ad54196dea84fac5d2f8bfd83a2e3f303d576.png
57ab289fb6ee116b24957dede31509ea.png
581babd2d16e09f1533c488d7b2af1a9747c620d.png
59582d8f154dff7c3fe18c7b5c938e26001444a7.png
5ada97601b178f6d9a02e73d204a26c88d439418.png
5c87863079c9be45cb550b2bc91ca18471843a83.png
5d3e4b9ca0ddb7104e8d926d6407072a93b282af.png
5d992544151cba201b43955139d30c22025971fd.png
5da169abb6c889438fec5b1afc5656d6a103f41a.png
5e17b0bf3186e43dc03daef88b77d431.png
5e40729440aa762300da0bcdd11633534a10cc8a.png
5eb45504a687f89f716cb4693011689ccb84b4e0.png
5f381dbae92f5681b09a03241ae948d10811c4eb.png
615cc8de5ad8bac96962915d50d24608.png
632c3ca7787631a249ac3daff256844b496d6ce1.png
66f8bf6c6c67a390e766ab9b5c473d13.png
679b2903a61e0112da05e60b74a8f2a6bc788fb2.png
6a44db030d669ffe655bc9d7e3d583851e361988.png
6c22d7ee08641d80a2c529947e8272758b832f96.png
6fb3a0311eaca9058e958fe50a61ab11.png
7057d30eb94c1d0994301fc32a89104ef0d78085.png
70eaf283c7a1d288c7989a7c2a885fdc2e91997a.png
7862a463b3da7ecae6cc16b197ff6fc22c6ee6bf.png
7a3819bc85e7b49ccc4d4cdb5f454e80c70eafca.png
7ade428414ccc9f77963c00e142b5a3d61ca3618.png
81b1310651ffaaeed79110e9bcc7b7c57c6ad0de.png
841f6f5d04167f922447d8635d99c83f7154fdf1.png
84bdd4c54d3b1a6e80a155bdad1c01d698a2dd21.png
857ffd6ad885fa349bc4615c750ea5c85f0527fc.png
87ae698de4efcce621fcf7450a2ad6ab9bb1f04a.png
89ec405740b7e3ae81f0942bb7f9730c72229b0c.png
8cac04a5ffc67914b8298ba01d6be45d8f5f3b12.png
8df14ce4a45aeadd065170a3578a9be01125d279.png
8e2f9c0b61fcac98b47e4b9575cb3c9883a11c43.png
8f327da21319e36516d5c889a4e0f1a9.png
93b1982ab8920535e71062a8b1cc28256f573563.png
93dcbdc94b57266bac92827c6005b9b2.png
95db7d057be6a1dc2bcdb4b66dc86c1f80f1b846.png
970f595cae2c97f5f279b88509448b1d.png
99edf034674c8151c84be90d19bfec61c6c72255.png
9b87866555f4936846eab149e05458c1bae63656.png
a18a7ba32bcf88d3ef54f5ef54c22dd5eba97c81.png
a26c18fdcfed101758e45d5704a001a6c23a1084.png
a29281de40bcb98752247ae4d1b64482c65481e2.png
a8e4ea0431fc0700f936a86cfd96a7a6.png
ab2214d42bc082f94ea83379c1eec30ad957fb84.png
b296ecf29840e105e9b40464e6c284972c9554a4.png
b51e78365fc3cf6d20fd89f5dc1507ea32baaced.png
b8d6e7c7bd105977732caec334c7a6f8.png
b8de5f970ebd938e545f21f990f8258eee1211c9.png
bcfc567b0804e656b2426d9ce26f2ae9dd6e4c76.png
c2babb71e0f86a264578811e7ac2bfdcb81476f3.png
c377fd92b98bf949e033f875c561afb6ffa1405a.png
c47842153c610d86b23fe0e90ff431d2df0cc908.png
c4be2e0c73f8be8e2e8518ce664b34f6396149ca.png
c70bfac2fa1a97df83b22d61463e57901dec8a79.png
c72536ff1adf24e453190f056b38f9ee35983151.png
cb2050530923bd75daa74e36d8f7430272433132.png
cbf6b173ee2d4a86578d489eba72fefa.png
cdd3d54e9302207f19fcd23014bbc582c82709dd.png
cf18dc1eb49dde59b0d82c7b8f5df22d.png
cf7d8d275d2b51fcf6ddbb51d493acdc46495f2d.png
d06104e169b4d38aaae429a9eb49715a74d39390.png
d14c594e5deeb772d368aa549532205b1a5ca632.png
d18783e81f9e9eaedf4975852c2721e1579eb5a0.png
d6d9e6a194cb813d56da1779ae9582b6df704876.png
d7f4568956271d8a5b095f218ca12020b371a399.png
d8a426fb66d29b713a40a45b3aed8896c984abdc.png
de0d3a8809e650ac901a121cd07edc2be887047b.png
e040bd5a25e20b813016c6a504771a4a81cd6be2.png
e0ef59ae84c809aba913548146e51735b53c463a.png
e3605bb29350f4a65ee33dc027398608256396fe.png
e709958b51b06d63748727d08cb4a6ed34f96429.png
e8cdb685caeff4ef7cf2b7a309aa51cdcac2cda6.png
e922eb2aa27ea4c4cbe93b3ea377885b9540fb6b.png
ea98c79ce8e331d282dff7f44bc32de00f648f5a.png
eb0634e709598052ec962a8e11b4ce531ceb7d28.png
ee096d9172d492e6864e153d80e70f0a96ead074.png
ee752cc92e2067fa92c813219510f7513144e330.png
f10e0850b6edbee34b7ed2d0a676acb8a80b4134.png
f1869e20122a7efa362797ed9fca3718.png
f3d55fb02306f647c109b5db9c10d4d837ba8cde.png
f59c2e05290e91e9a99a9141c3e97903a2660617.png
f772a64533f2e720ac11ee84f27fb259.png
ff407d7b4540186daddb60e3232930c7.png

Visualización de imágenes (5 puntos)¶


TO DO: Visualiza una imagen de cada clase.

In [60]:
num_classes = len(os.listdir(ABS_PATH_DATASET))
print(f"Number of classes in dataset: {num_classes}")

cols = 5
rows = math.ceil(num_classes/cols)

fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if num_classes > 1 else [axes]

for i, class_name in enumerate(os.listdir(ABS_PATH_DATASET)):
    class_path = os.path.join(ABS_PATH_DATASET, class_name)
    img_name = os.listdir(class_path)[0]
    img_path = os.path.join(class_path, img_name)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axes[i].imshow(img)
    axes[i].set_title(class_name, fontsize=10)
    axes[i].axis('off')

for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.show()
Number of classes in dataset: 9
No description has been provided for this image

se pude ver que hay algunos modelos de coches un poco viejos, pero al entrar a inspeccionar manualmente, también vpi que hay modelos recientes, así que se podría decir que en una clase van a caber dos o más tipos de modelos, veremos como maneja esto la red

In [8]:
def analyze_dataset(path):
    class_counts = {}
    total_images = 0

    for class_name in os.listdir(path):
        class_path = os.path.join(path, class_name)
        if not os.path.isdir(class_path):
            continue
        num_images = len([f for f in os.listdir(class_path) if f.lower().endswith(('.png','.jpg','.jpeg','.ppm'))])
        class_counts[class_name] = num_images
        total_images += num_images

    
    average_images_per_class = total_images / len(class_counts)
    for class_name, count in class_counts.items():
        print(f"Class '{class_name}': {count} images")

    
    print(f"\nTotal number of images: {total_images}")
    print(f"Average number of images per class: {average_images_per_class:.2f}")
    print(f"Class with least images: {min(class_counts, key=class_counts.get)} ({min(class_counts.values())} images)")
    print(f"Class with most images: {max(class_counts, key=class_counts.get)} ({max(class_counts.values())} images)")


def sample_image_dims(path):
    files = []
    for class_name in os.listdir(path):
        class_path = os.path.join(path, class_name)
        if os.path.isdir(class_path):
            cls_files = [os.path.join(class_path, f) for f in os.listdir(class_path)]
            files.extend(cls_files)
    
    dims = []
    for img_path in files:
        img = cv2.imread(img_path)
        dims.append((img.shape[1], img.shape[0]))
    
    dims = np.array(dims)
    print(f"\nImage size statistics:")
    print(f"Width: min={dims[:,0].min()}, max={dims[:,0].max()}, mean={dims[:,0].mean():.1f}, std={dims[:,0].std():.1f}")
    print(f"Height: min={dims[:,1].min()}, max={dims[:,1].max()}, mean={dims[:,1].mean():.1f}, std={dims[:,1].std():.1f}")


analyze_dataset(ABS_PATH_DATASET)
sample_image_dims(ABS_PATH_DATASET)
Class 'matiz black': 235 images
Class 'rio black': 306 images
Class 'matiz red': 346 images
Class 'rio red': 431 images
Class 'tiggo black': 286 images
Class 'matiz blue': 334 images
Class 'rio blue': 262 images
Class 'tiggo red': 262 images
Class 'tiggo blue': 273 images

Total number of images: 2735
Average number of images per class: 303.89
Class with least images: matiz black (235 images)
Class with most images: rio red (431 images)

Image size statistics:
Width: min=138, max=5760, mean=686.3, std=442.8
Height: min=124, max=3840, mean=472.9, std=300.7

Se puede ver que las imágenes están bastante balanceadas en cuanto a cantidad en cada clase, mas en tamaño no mucho, varían mucho en tamaño

Carga de imágenes en dataloaders (15 puntos)¶


TO DO: Construye los dataloaders necesarios, con las transformaciónes adecuadas, y muestra un batch. Connstruye las etiquetas de las imágenes para poder hacer clasificación multi-etiqueta.

In [16]:
train_transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomCrop((128, 128)),
    transforms.RandomHorizontalFlip(.5),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

test_transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.CenterCrop((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

añadí la random rotation ya que en este tipo de imágenes no importa mucho si están espejeadas, de hecho, enriquece al modelo porque es como si el coche estuviera tomado desde otro punto

In [17]:
import lightning as L

class CarDataModule(L.LightningDataModule):
    def __init__(self, data_dir, batch_size=32, num_workers=4, train_transform=None, test_transform=None, pin_memory=False):
        super().__init__()
        self.data_dir = os.path.expanduser(data_dir)
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.train_transform = train_transform
        self.test_transform = test_transform
        self.pin_memory = pin_memory
        self.setup()

    def setup(self, stage=None):
        full_dataset = ImageFolder(self.data_dir, transform=self.train_transform)
        
        total_size = len(full_dataset)
        train_size = int(.7 * total_size)
        val_size = int(.15 * total_size)
        test_size = total_size - train_size - val_size
        
        self.train, self.val, self.test = random_split(
            full_dataset, 
            [train_size, val_size, test_size],
            generator=torch.Generator().manual_seed(42)
        )
        
        self.val.dataset.transform = self.test_transform
        self.test.dataset.transform = self.test_transform
        
        self.num_classes = len(full_dataset.classes)
        self.classes = full_dataset.classes

    def train_dataloader(self):
        return DataLoader(
            self.train,
            batch_size=self.batch_size,
            drop_last=True,
            shuffle=True,
            persistent_workers=True,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory
        )

    def val_dataloader(self):
        return DataLoader(
            self.val,
            batch_size=self.batch_size,
            drop_last=False,
            shuffle=False,
            persistent_workers=True,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory
        )

    def test_dataloader(self):
        return DataLoader(
            self.test,
            batch_size=self.batch_size,
            drop_last=False,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory
        )

Datamodule estándar como le habíamos estado haciendo, hice el split de train/val7test dentro del mismo

In [61]:
dm = CarDataModule(ABS_PATH_DATASET, batch_size=16, train_transform=train_transform, test_transform=test_transform, pin_memory=pin_memory)

loader = dm.train_dataloader()
images, labels = next(iter(loader))

print('Batch shapes -> images:', images.shape, 'labels:', labels.shape)
print(f'Classes: {dm.classes}')
print(f'Number of classes: {dm.num_classes}')

inv = lambda x: x * 0.5 + 0.5

n = images.size(0)
cols = 4
rows = math.ceil(n / cols)

fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten()

for i in range(n):
    img = inv(images[i]).permute(1, 2, 0).cpu().numpy()
    axes[i].imshow(img)
    
    class_name = dm.classes[labels[i].item()]
    
    axes[i].set_title(class_name, fontsize=10)
    axes[i].axis('off')

for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()
Batch shapes -> images: torch.Size([16, 3, 128, 128]) labels: torch.Size([16])
Classes: ['matiz black', 'matiz blue', 'matiz red', 'rio black', 'rio blue', 'rio red', 'tiggo black', 'tiggo blue', 'tiggo red']
Number of classes: 9
No description has been provided for this image

Visualización de imágenes de training ya carhadas en el dataloader

Definición del modelo (15 puntos)¶


TO DO: Crea un modelo con capas convolucionales para hacer la clasificación.

In [ ]:
resnet_model = torch.hub.load("pytorch/vision", "resnet18", weights="IMAGENET1K_V1")

for param in resnet_model.parameters():
    param.requires_grad = False

resnet_model.layer4.requires_grad_(True)

resnet_model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.3),
    torch.nn.Linear(512, dm.num_classes)
)
resnet_model.fc

Se utilizará el modelo de resnet18 ya que he tenido buena experiencia para clasificación multiclase de imágenes en las que no se necesita capturar gran detalle de ellas, por lo que las imágenes pueden ser redimensionadas a una escala no muy grande para lograr una buena velocidad de entrenamiento

In [23]:
class CarModelLightning(L.LightningModule):
    def __init__(self, model, learning_rate, num_classes):
        super().__init__()
        self.learning_rate = learning_rate
        self.model = model
        
        self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
        self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
        self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)

    def forward(self, x):
        return self.model(x)

    def _shared_step(self, batch):
        features, true_labels = batch
        logits = self(features)
        
        loss = F.cross_entropy(logits, true_labels)
        predicted_labels = torch.argmax(logits, dim=1)
        return loss, true_labels, predicted_labels

    def training_step(self, batch, batch_idx):
        loss, true_labels, predicted_labels = self._shared_step(batch)
        
        self.log("train_loss", loss)
        self.train_acc(predicted_labels, true_labels)
        self.log("train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)
        return loss

    def validation_step(self, batch, batch_idx):
        loss, true_labels, predicted_labels = self._shared_step(batch)
        
        self.log("val_loss", loss, prog_bar=True)
        self.val_acc(predicted_labels, true_labels)
        self.log("val_acc", self.val_acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        with torch.no_grad():
            loss, true_labels, predicted_labels = self._shared_step(batch)
            self.test_acc(predicted_labels, true_labels)
            self.log("test_acc", self.test_acc)

    def configure_optimizers(self):
        optimizer = torch.optim.RMSprop(self.parameters(), lr=self.learning_rate)
        return optimizer

Wrapper del modelo con Lightning, en donde se definen los steps en métodos de la clase abstracta de Lightning

Entrenamiento (5 puntos)¶


TO DO: Entrena el modelo, y muestra los resultados de validación y entrenamiento.

In [24]:
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import CSVLogger

car_model_lightning = CarModelLightning(
    model=resnet_model,
    learning_rate=0.001,
    num_classes=dm.num_classes
)

print(f"device={device}, accelerator={accelerator}, devices={devices}")

trainer = L.Trainer(
    accelerator=accelerator, 
    devices=devices or 0,
    logger=CSVLogger(save_dir="logs/", name="car-classifier-resnet18"),
    max_epochs=20,
    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=5)],
)
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
device=mps, accelerator=mps, devices=1

Se crea el trainer al que se le pasa el modelo, se configura el logger, y el callback de earlystopping

In [25]:
trainer.fit(model=car_model_lightning, datamodule=dm)
  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | ResNet             | 11.2 M | train
1 | train_acc | MulticlassAccuracy | 0      | train
2 | val_acc   | MulticlassAccuracy | 0      | train
3 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
8.4 M     Trainable params
2.8 M     Non-trainable params
11.2 M    Total params
44.725    Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode
Epoch 7: 100%|██████████| 119/119 [00:03<00:00, 36.78it/s, v_num=4, val_loss=0.445, val_acc=0.893, train_acc=0.960]

Se puede ver que el entrenamiento para en la época 7 por la política del earlystopper.

In [27]:
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
df_epochs = metrics.groupby('epoch').mean()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].plot(df_epochs['train_acc'], label="Train acc")
axes[0].plot(df_epochs['val_acc'], label="Val acc")
axes[0].set_title("Training and Validation Accuracy")
axes[0].legend(loc='lower right')

axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].plot(df_epochs['train_loss'], label="Train loss")
axes[1].plot(df_epochs['val_loss'], label="Val loss")
axes[1].set_title("Training and Validation Loss")
axes[1].legend(loc='upper right')

plt.tight_layout()
plt.show()
No description has been provided for this image

Se llegó a una accuracy muy alta, alcanzando el 89% el validación y 96% en training, y también un loss muy bajo.

Considero que el modelo quedó ligeramente overfitteado, ya que hay una diferencia de 7% entre training y validation

Pruebas (5 puntos)¶


TO DO: Prueba el desempeño de tu modelo con datos de prueba.

In [29]:
trainer.test(datamodule=dm)
Restoring states from the checkpoint path at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt
Loaded model weights from the checkpoint at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt
Loaded model weights from the checkpoint at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt
Testing DataLoader 0: 100%|██████████| 26/26 [00:00<00:00, 53.76it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8613138794898987
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8613138794898987
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Out[29]:
[{'test_acc': 0.8613138794898987}]

En testing conseguimos una accuracy de 86%, resultado de que el modelo está ligeramente overfitteado, lo ideal sería que ésta sea igual a la de validación

Predicción (5 puntos)¶


TO DO: Descarga una imagen, y haz una predicción sobre ella. Usa wget como en el siguiente ejemplo.

In [ ]:
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_rojo.jpg https://i.pinimg.com/736x/83/90/e7/8390e7fb457d2b87f98483982ebe4a62.jpg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_rojo.jpg https://cdn.pixabay.com/photo/2018/04/09/22/07/car-3305699_1280.jpg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_azul.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/KIA_Rio_-_CIAS_2012_%286787393208%29.jpg/640px-KIA_Rio_-_CIAS_2012_%286787393208%29.jpg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_negro.jpeg https://upload.wikimedia.org/wikipedia/commons/thumb/6/60/Kia_YB_Rio_Hatch.jpeg/640px-Kia_YB_Rio_Hatch.jpeg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/tiggo_negro.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Chery_Tiggo_DR_Edition_IMG001.jpg/640px-Chery_Tiggo_DR_Edition_IMG001.jpg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/tiggo_rojo.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/b/ba/Chery_Tiggo_facelift_II_China_2012-05-12.jpg/640px-Chery_Tiggo_facelift_II_China_2012-05-12.jpg

!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_negro.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/3/32/Matiz_017.jpg/640px-Matiz_017.jpg


!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_azul.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Daewoo_Matiz_%28front%29%2C_Denpasar.jpg/640px-Daewoo_Matiz_%28front%29%2C_Denpasar.jpg

Importante cambiar la ruta de destino de las descargas en caso de querer probar de forma local

dato curioso: me banearon de pinterest al estar copiando links de descargas, yo creo que vieron mi actividad sospechosa

In [62]:
manual_test_images = os.listdir(ABS_PATH_MANUAL_TESTING_DATASET)


n_imgs = len(manual_test_images)
cols = 4
rows = math.ceil(n_imgs / cols)

fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if n_imgs > 1 else [axes]

for i, img_name in enumerate(manual_test_images):
    img_path = os.path.join(ABS_PATH_MANUAL_TESTING_DATASET, img_name)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    axes[i].imshow(img)
    axes[i].set_title(img_name, fontsize=10)
    axes[i].axis('off')

for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.show()
No description has been provided for this image

Imágenes descargadas de internet

In [63]:
from PIL import Image

car_model_lightning.to(device).eval()

n_imgs = len(manual_test_images)
cols = 4
rows = math.ceil(n_imgs / cols)

fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if n_imgs > 1 else [axes]

for i, img_name in enumerate(manual_test_images):
    img_path = os.path.join(ABS_PATH_MANUAL_TESTING_DATASET, img_name)

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    pil_img = Image.fromarray(img)
    img_tensor = test_transform(pil_img).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = car_model_lightning(img_tensor)
        pred_idx = int(logits.argmax(dim=1).cpu().item())
        pred_class = dm.classes[pred_idx]

    axes[i].imshow(img)
    axes[i].set_title(f"Prediction: {pred_class}\nLabel: {img_name}", fontsize=10)
    axes[i].axis('off')

for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()
No description has been provided for this image

Probando con las imágenes descargadas de internet, en todos los casos en modelo predijo la clase correcta, ésto se lo atribuyo al dataset, que por ejemplo comparado con el de las señales alemanas, tiene una mejor calidad, distribución más pareja y sobre todo el encuadre de los coches varía mucho y no solo es de un tipo, lo que hace al modelo más versátil y capaz de clasificar fotos más realistas de coches, sin estar en un ambiente controlado